This project consists on a deep neural network capable of determine what kind of traffic sign is presented to it. Specifically it classifies the german traffic signs. For training, validation and testing of the network the German Traffic Sign Dataset is used.
The first step in order to create a neural network is to load the data that will be used for training, validation and testing. In this case, the dataset used is the German Traffic Sign Dataset, which is saved on the folder "TrainData" as pickle files.
# Load pickle module
import pickle
import cv2
import numpy as np
trainFile = 'TrainData/train.p'
validationFile = 'TrainData/valid.p'
testFile = 'TrainData/test.p'
with open(trainFile, mode='rb') as file:
trainData = pickle.load(file)
with open(validationFile, mode='rb') as file:
validationData = pickle.load(file)
with open(testFile, mode='rb') as file:
testData = pickle.load(file)
trainData_X, trainData_y = trainData['features'], trainData['labels']
validationData_X, validationData_y = validationData['features'], validationData['labels']
testData_X, testData_y = testData['features'], testData['labels']
# It verifies if the images and the labels have the same length
assert(len(trainData_X) == len(trainData_y))
assert(len(validationData_X) == len(validationData_y))
assert(len(testData_X) == len(testData_y))
Once the data is loaded, it is time to inspect it, in order to know what are we working with. First, let's extract some important information from it.
import numpy as np
# Number of training samples
trainData_quantity = trainData_X.shape[0]
# Number of validation samples
validationData_quantity = validationData_X.shape[0]
# Number of test samples
testData_quantity = testData_X.shape[0]
# Image shape
imagesShape = (trainData_X.shape[1], trainData_X.shape[2], trainData_X.shape[3])
# How many unique classes/labels there are in the dataset.
Labels_quantity = len(np.unique(trainData_y))
print("Number of training examples =", trainData_quantity)
print("Number of testing examples =", testData_quantity)
print("Number of validation examples =", validationData_quantity)
print("Image data shape =", imagesShape)
print("Number of classes =", Labels_quantity)
Now it is time to visualize the data by printing some of them:
import matplotlib.pyplot as plt
import random
%matplotlib inline
figure1, fig1_axes = plt.subplots(10, 10, figsize =(20,20))
figure1.tight_layout()
figure1.suptitle('100 randomly chosen images from the training data.\nNumbers indicate their label.',
fontsize = 20)
fig1_axes = fig1_axes.flatten()
for ax in fig1_axes:
imgIndex = random.randint(0, trainData_quantity-1)
ax.imshow(trainData_X[imgIndex], cmap = 'gray')
ax.set_title(trainData_y[imgIndex])
ax.axis('off')
figure1.subplots_adjust(top = 0.9, bottom = 0.03)
figure1.savefig('ImgsReport/01_100RandomTrainData')
figure2, fig2_axes = plt.subplots(10, 10, figsize =(20,20))
figure2.tight_layout()
figure2.suptitle('100 randomly chosen images from the validation data.\nNumbers indicate their label.',
fontsize = 20)
fig2_axes = fig2_axes.flatten()
for ax in fig2_axes:
imgIndex = random.randint(0, validationData_quantity-1)
ax.imshow(validationData_X[imgIndex], cmap = 'gray')
ax.set_title(validationData_y[imgIndex])
ax.axis('off')
figure2.subplots_adjust(top = 0.9, bottom = 0.03)
figure2.savefig('ImgsReport/02_100RandomValidationData')
figure3, fig3_axes = plt.subplots(10, 10, figsize =(20,20))
figure3.tight_layout()
figure3.suptitle('100 randomly chosen images from the test data.\nNumbers indicate their label.',
fontsize = 20)
fig3_axes = fig3_axes.flatten()
for ax in fig3_axes:
imgIndex = random.randint(0, testData_quantity-1)
ax.imshow(testData_X[imgIndex])
ax.set_title(testData_y[imgIndex])
ax.axis('off')
figure3.subplots_adjust(top = 0.9, bottom = 0.03)
figure3.savefig('ImgsReport/03_100RandomTestData')
By printing 100 randomly chosen images from the train, validation and test data, it can be seen that the images are taken in different light conditions, different perspectives and different sharpness levels. Some images have stickers ore some other perturbations as well. So it is a good dataset to start with.
Let's now analyze how many images are from every class on the three datasets:
# Array from 0 to "Labels_quantity-1" used to determine how many data are from every kind of traffic sign on the
# three datasets
labelsIdentifier = np.linspace(0, Labels_quantity-1, Labels_quantity)
trainData_qtLabels = np.histogram(trainData_y, bins = Labels_quantity)
validationData_qtLabels = np.histogram(validationData_y, bins = Labels_quantity)
testData_qtLabels = np.histogram(testData_y, bins = Labels_quantity)
# plt.bar(labelsIdentifier, trainData_qtLabels[0])
figure4, fig4_axes = plt.subplots(3, 1, figsize =(15,20))
figure4.suptitle('Frequency of different labels on training, validation and test datasets',
fontsize = 20)
fig4_axes[0].bar(labelsIdentifier, trainData_qtLabels[0])
fig4_axes[0].set_title('Training data')
fig4_axes[0].set_xticks(np.arange(0, Labels_quantity-1, step = 5))
fig4_axes[1].bar(labelsIdentifier, validationData_qtLabels[0])
fig4_axes[1].set_title('Validation data')
fig4_axes[1].set_xticks(np.arange(0, Labels_quantity-1, step = 5))
fig4_axes[2].bar(labelsIdentifier, testData_qtLabels[0])
fig4_axes[2].set_title('Test data')
fig4_axes[2].set_xticks(np.arange(0, Labels_quantity-1, step = 5))
figure4.subplots_adjust(top = 0.93, bottom = 0.20)
figure4.savefig('ImgsReport/04_LabelsFrequency')
From the three plots can be seen that the images are almost equally distribuited on the train, validation and test datasets.
It can also be seen that some signs are more frequent than other. The speed limits from 30 km/h to 80 km/h (labels from 1 to 5) are more frequent than the speed limit 20 km/h (label 0). The end of speed limit 80 km/h is also not that frequent (label 6). The speed limits 100 km/ and 120 km/h (label 7 and 8) are also very frequent, but not that much like the lower ones. This can result that the neural network will be more prepared to recognize speed limit trafic signs between 30 km/h and 80 km/h than to 20 km/h, because it will have more exposition to these ones.
A similar situation can occur with the sign "No passing for vehicles over 3.5 metric tons", which is also very frequent (label 10), more than the normal "No passing" sign (label 9).
Other situation like that is between the signs "Keep right" (label 38) and "Keep left" (label 39). The "keep right" sign is way more frequent than the "keep left" one. That can result on "Keep left" images being recognized as "Keep right" since they are very similar.
The next step is to preprocess the data in order to be used to train, validate and test the neural network. These preprocessing consist on normalizing the images, so their values are between -1 and 1. Note that this processing is done for all three color channels of the images.
This normalization is done in order to get a good conditioned dataset which performs well with the optimizer used to train the neural network. A bad conditioned dataset could result on overflows while doing the optimization.
# Function to normalize images from range (0, 255) to another range. The default normalized range is between
# -1 and 1, so the image data have a mean of 0 and a standard deviation of 1.
def normalizeRGBImage(image, normRange = (-1, 1)):
normalizedImage = np.zeros_like(image)
normalizedMinMatrix = np.full_like(image, normRange[0], dtype = np.float32)
inputMin = 0
inputMax = 255
inputMinMatrix = np.full_like(image, inputMin, dtype = np.float32)
slope = (normRange[1] - normRange[0])/(inputMax - inputMin)
normalizedImage = (image - inputMinMatrix)*slope + normalizedMinMatrix
return normalizedImage
trainData_X = normalizeRGBImage(trainData_X)
validationData_X = normalizeRGBImage(validationData_X)
testData_X = normalizeRGBImage(testData_X)
The next step would be to augment the data doing translations, rotations, zooms, etc. This is done in order to have a more diverse dataset which makes the neural network learn the signs even if the position or size is different than it should be, making it more robust.
In this project this is done lately in order to increse the performance of the selected model, for now it will be worked on the raw dataset.
The Yann LeCunn's LeNet-5 neural network was selected as a starting point for the neural network arquitecture. This network was developed by Yann LeCunn to classify handwritten numbers from 0 to 9, but it can be easilly used for other image classification tasks.
The original LeNet-5 network consisted of the following layers:
Between all internal layers the activation function hyperbolic tangent (tanh) was used.
This structure can be explained with the following image (Original Image published in [LeCun et al., 1998] )
The model was changed in order to be usable for this classification task.
The activation functions where changed from hyperbolic tangent (tanh) to ReLU for two reasons:
In order to avoid overfitting (that the neural network learns so well the training data and has problems while working with new data), dropout is used for the fully connected layer after the convolutional part of the network.
So, the new network arquitecture looks like following:
The function "LeNet_V1" implements this network on tensorflow. This function is saved in the file "LeNetV1.py"
# Imports tensorflow
import tensorflow as tf
from LeNetV1 import LeNet_V1
Let's print the shape of the output of this function in order to see if the dimensions are correct. They should be 34799x43.
print(LeNet_V1(trainData_X, 0.75))
It can be seen that the dimensions are as wanted. The function returns 34799 vectors of 43 values corresponding to the probabilities of an image being each of the 43 traffic signs.
Here are defined three TensorFlow placeholders to be used later on the TensorFlow session.
X = tf.placeholder(tf.float32, (None, 32, 32, 3))
Y = tf.placeholder(tf.int32, (None))
keepProb = tf.placeholder(tf.float32)
# It is important to convert the label data to one hot before doing training with it, since Y and the output
# of LeNet_V1 need to have the same shape.
Y_oneHot = tf.one_hot(Y, 43)
Here are declared the three most important hyperparameters used for the training. They will be tunned in order to get the best validation accuracy.
EPOCHS = 10
BATCH_SIZE = 128
LEARNING_RATE = 0.001
DROPOUT_KEEP_PROB = 0.5
In this section it is defined, how the network will be trained.
For optimization, instead of using the traditional stochastic gradient descent algorithm which only uses the gradient of all parameters and the learning rate, the Adam Algorithm was used.
The Adam Algorithm described by Diederik Kingma and Jimmy Lei Ba on their 2015 paper "Adam : A method for stochastic optimization" does not use directly the gradient in order to change the parameters. Instead, it uses the first and second moment of the gradients in order to create an update term for the parameters which changes through the time. This algorithm has gained a good acceptance in the last years and is seen as the first option when working with big deep neural networks.
nnOut = LeNet_V1(X, keepProb)
crossEntropy = tf.nn.softmax_cross_entropy_with_logits(labels = Y_oneHot, logits = nnOut)
loss = tf.reduce_mean(crossEntropy)
optimizer = tf.train.AdamOptimizer(learning_rate = LEARNING_RATE)
trainingOperation = optimizer.minimize(loss)
For validation, the accuracy operation is defined:
correctPrediction = tf.equal(tf.argmax(nnOut, 1), tf.argmax(Y_oneHot, 1))
accuracyOperation = tf.reduce_mean(tf.cast(correctPrediction, tf.float32))
Now that the model is selected, together with the optimizer, it is time to train it and see how well it performs. This cell also logs the training loss, the training accuracy and the validation accuracy every 50 batches in order to plot them afterwards.
from sklearn.utils import shuffle
from tqdm import tqdm
import math
initializer = tf.global_variables_initializer()
# Some variables to plot
logBatchStep = 50
batches = []
lossBatch = []
trainAccBatch = []
validAccBatch = []
lastTrainAcc = 0
lastValidAcc = 0
lastLoss = 0
with tf.Session() as session:
session.run(initializer)
batchCount = int(np.ceil(len(trainData_X)/BATCH_SIZE))
for epoch_i in range(EPOCHS):
# Shuffles the data so it does not make the same sequence for every epoch
normTrainData_X, trainData_y = shuffle(trainData_X, trainData_y)
# Progress bar:
barDesc = 'Epoch {:>2}/{}'.format(epoch_i+1, EPOCHS)
batchesPbar = tqdm(range(batchCount), desc= barDesc, unit='batches')
# Initializes training accuracy at start of every batch
trainingAccBatchSum = 0
# Training cycle
for batch_i in batchesPbar:
# Get the batch of training features and labels
batchStart = batch_i*BATCH_SIZE
batchEnd = batchStart + BATCH_SIZE
batchData_X = trainData_X[batchStart:batchEnd]
batchData_y = trainData_y[batchStart:batchEnd]
# Runs the optimizer and saves the loss on batchLoss
trainingFeedDict = {X: batchData_X, Y: batchData_y, keepProb : DROPOUT_KEEP_PROB}
_ , batchLoss = session.run([trainingOperation, loss], feed_dict= trainingFeedDict)
# Calculates accuracy of the current batch in order to get training accuracy
trainingAccFeedDict = {X: batchData_X, Y: batchData_y, keepProb : 1.0}
batchTrainAcc = session.run(accuracyOperation, feed_dict = trainingAccFeedDict)
trainingAccBatchSum += batchTrainAcc
logBatchStep = 50
# Logs every 50 batches
if not ((epoch_i*len(batchesPbar) + batch_i) % logBatchStep):
# Calculates training accuracy based on the executed batches
lastTrainAcc = trainingAccBatchSum / (batch_i + 1)
# Calculates validation accuracy
validationAccFeedDict = {X: validationData_X, Y: validationData_y, keepProb : 1.0}
lastValidAcc = session.run(accuracyOperation, feed_dict = validationAccFeedDict)
batchesPbar.set_description('Epoch {:>2}/{} | Loss: {:.2f} | Train Acc.: {:.2f} | Val. Acc.: {:.2f}'
.format(epoch_i+1, EPOCHS, batchLoss, lastTrainAcc, lastValidAcc))
batchesPbar.refresh()
# Log data in order to print it afterwards
batches.append(epoch_i*len(batchesPbar) + batch_i)
lossBatch.append(batchLoss)
trainAccBatch.append(lastTrainAcc)
validAccBatch.append(lastValidAcc)
The model performed very well reaching a training accuracy of 98% and a validation accuracy of 94%. In the next cells these results are analyzed more deeply.
figure5, fig5_axes = plt.subplots(2, 1, figsize =(15,15))
figure5title = "Training loss and accuracies with training and validation sets. \n Learning rate: " + \
str(LEARNING_RATE) + ", Batch size: " + str(BATCH_SIZE) + ", Epochs: " + str(EPOCHS) + \
", Dropout keep probability: " + str(DROPOUT_KEEP_PROB)
figure5.suptitle(figure5title, fontsize = 20)
fig5_axes[0].plot(batches, lossBatch)
fig5_axes[0].set_xticks(np.arange(0, EPOCHS*len(batchesPbar) + len(batchesPbar), step = len(batchesPbar)))
fig5_axes[0].grid()
fig5_axes[0].set_xlim(0, EPOCHS*len(batchesPbar))
fig5_axes[0].set_title('Training loss')
fig5_axes[1].plot(batches, trainAccBatch, 'r', label = 'Training Accuracy')
fig5_axes[1].plot(batches, validAccBatch, 'g', label = 'Validation Accuracy')
fig5_axes[1].plot(batches, np.full_like(batches, 0.93, dtype = np.float32), '--', label = '0.93')
fig5_axes[1].set_xticks(np.arange(0, EPOCHS*len(batchesPbar) + len(batchesPbar), step = len(batchesPbar)))
fig5_axes[1].set_yticks(np.arange(0, 1 + 0.2, step = 0.2))
fig5_axes[1].set_xlim(0, EPOCHS*len(batchesPbar))
fig5_axes[1].legend(loc = 'lower right')
fig5_axes[1].grid()
fig5_axes[1].set_title('Accuracy')
figure5.savefig('ImgsReport/05_LeNetV1TrainingResults01')
From the two graphics it can be seen that the network with the given hyperparameters does the biggest part of the training during the first five epochs. At the end of the fifth epoch it reaches already a validation accuracy of 90%. In the next five epochs the validation accuracy grows very slowly but steady. It also can be seen from the loss function that the model was still getting better when the training ended, which can mean that a bigger number of epochs would result in an even higher validation accuracy.
In order to be able to change the hyperparameters easily and show the results here. Let's create a function which executes the training and extract the data, so the new sessions do not have so many code lines. This function is called "trainNetwork" and is saved in the file "LeNetV1.py".
import sys, importlib
importlib.reload(sys.modules['LeNetV1'])
from LeNetV1 import trainNetwork
# Increase EPOCHS by 5
EPOCHS = 15
initializer = tf.global_variables_initializer()
with tf.Session() as session:
session.run(initializer)
outputLists = trainNetwork(trainData_X, trainData_y, validationData_X, validationData_y,
trainingOperation, loss, accuracyOperation, X, Y, keepProb,
epochs = EPOCHS, batchSize = BATCH_SIZE, dropoutKeepProb = DROPOUT_KEEP_PROB)
batches_2 = outputLists[0]
lossBatch_2 = outputLists[1]
trainAccBatch_2 = outputLists[2]
validAccBatch_2 = outputLists[3]
batchLength = np.ceil(len(normTrainData_X)/BATCH_SIZE)
figure6, fig6_axes = plt.subplots(2, 1, figsize =(15,15))
figure6title = "Training loss and accuracies with training and validation sets. \n Learning rate: " + \
str(LEARNING_RATE) + ", Batch size: " + str(BATCH_SIZE) + ", Epochs: " + str(EPOCHS) + \
", Dropout keep probability: " + str(DROPOUT_KEEP_PROB)
figure6.suptitle(figure6title, fontsize = 20)
fig6_axes[0].plot(batches_2, lossBatch_2)
fig6_axes[0].set_xticks(np.arange(0, EPOCHS*batchLength + batchLength, step = batchLength))
fig6_axes[0].grid()
fig6_axes[0].set_xlim(0, EPOCHS*batchLength)
fig6_axes[0].set_title('Training loss')
fig6_axes[1].plot(batches_2, trainAccBatch_2, 'r', label = 'Training Accuracy')
fig6_axes[1].plot(batches_2, validAccBatch_2, 'g', label = 'Validation Accuracy')
fig6_axes[1].plot(batches_2, np.full_like(batches_2, 0.93, dtype = np.float32), '--', label = '0.93')
fig6_axes[1].set_xticks(np.arange(0, EPOCHS*batchLength + batchLength, step = batchLength))
fig6_axes[1].set_yticks(np.arange(0, 1 + 0.2, step = 0.2))
fig6_axes[1].set_xlim(0, EPOCHS*batchLength)
fig6_axes[1].legend(loc = 'lower right')
fig6_axes[1].grid()
fig6_axes[1].set_title('Accuracy')
figure6.savefig('ImgsReport/06_LeNetV1TrainingResults02')
By looking at the new graphics, it can be seen that the performance of the network didn't really improved by increasing the number of epochs. The last validation accuracy was also of 94%, which means that with the given learning rate, the network converges to a solution within the first 10 epochs.
Let's try decreasing the learning rate a bit and keep the epochs by 10. Decreasing the learning rate should make the convergence of the network slower, but it makes it possible that the performance increases a bit.
EPOCHS = 10
BATCH_SIZE = 128
LEARNING_RATE = 0.0007
DROPOUT_KEEP_PROB = 0.5
initializer = tf.global_variables_initializer()
with tf.Session() as session:
session.run(initializer)
outputLists = trainNetwork(trainData_X, trainData_y, validationData_X, validationData_y,
trainingOperation, loss, accuracyOperation, X, Y, keepProb,
epochs = EPOCHS, batchSize = BATCH_SIZE, dropoutKeepProb = DROPOUT_KEEP_PROB)
batches_3 = outputLists[0]
lossBatch_3 = outputLists[1]
trainAccBatch_3 = outputLists[2]
validAccBatch_3 = outputLists[3]
batchLength = np.ceil(len(normTrainData_X)/BATCH_SIZE)
figure7, fig7_axes = plt.subplots(2, 1, figsize =(15,15))
figure7title = "Training loss and accuracies with training and validation sets. \n Learning rate: " + \
str(LEARNING_RATE) + ", Batch size: " + str(BATCH_SIZE) + ", Epochs: " + str(EPOCHS) + \
", Dropout keep probability: " + str(DROPOUT_KEEP_PROB)
figure7.suptitle(figure7title, fontsize = 20)
fig7_axes[0].plot(batches_3, lossBatch_3)
fig7_axes[0].set_xticks(np.arange(0, EPOCHS*batchLength + batchLength, step = batchLength))
fig7_axes[0].grid()
fig7_axes[0].set_xlim(0, EPOCHS*batchLength)
fig7_axes[0].set_title('Training loss')
fig7_axes[1].plot(batches_3, trainAccBatch_3, 'r', label = 'Training Accuracy')
fig7_axes[1].plot(batches_3, validAccBatch_3, 'g', label = 'Validation Accuracy')
fig7_axes[1].plot(batches_3, np.full_like(batches_3, 0.93, dtype = np.float32), '--', label = '0.93')
fig7_axes[1].set_xticks(np.arange(0, EPOCHS*batchLength + batchLength, step = batchLength))
fig7_axes[1].set_yticks(np.arange(0, 1 + 0.2, step = 0.2))
fig7_axes[1].set_xlim(0, EPOCHS*batchLength)
fig7_axes[1].legend(loc = 'lower right')
fig7_axes[1].grid()
fig7_axes[1].set_title('Accuracy')
figure7.savefig('ImgsReport/07_LeNetV1TrainingResults03')
Decreasing the learning rate did not improve the performance of the network, it actually decreased it a little bit (from 94% to 93%).
Since the Adam optimizer is being used, which decreases the learning rate through time, it may be a better option to increase it a bit. Let's set it to 0.002.
EPOCHS = 10
BATCH_SIZE = 128
LEARNING_RATE = 0.002
DROPOUT_KEEP_PROB = 0.5
initializer = tf.global_variables_initializer()
with tf.Session() as session:
session.run(initializer)
outputLists = trainNetwork(trainData_X, trainData_y, validationData_X, validationData_y,
trainingOperation, loss, accuracyOperation, X, Y, keepProb,
epochs = EPOCHS, batchSize = BATCH_SIZE, dropoutKeepProb = DROPOUT_KEEP_PROB)
batches_4 = outputLists[0]
lossBatch_4 = outputLists[1]
trainAccBatch_4 = outputLists[2]
validAccBatch_4 = outputLists[3]
batchLength = np.ceil(len(normTrainData_X)/BATCH_SIZE)
figure8, fig8_axes = plt.subplots(2, 1, figsize =(15,15))
figure8title = "Training loss and accuracies with training and validation sets. \n Learning rate: " + \
str(LEARNING_RATE) + ", Batch size: " + str(BATCH_SIZE) + ", Epochs: " + str(EPOCHS) + \
", Dropout keep probability: " + str(DROPOUT_KEEP_PROB)
figure8.suptitle(figure8title, fontsize = 20)
fig8_axes[0].plot(batches_4, lossBatch_4)
fig8_axes[0].set_xticks(np.arange(0, EPOCHS*batchLength + batchLength, step = batchLength))
fig8_axes[0].grid()
fig8_axes[0].set_xlim(0, EPOCHS*batchLength)
fig8_axes[0].set_title('Training loss')
fig8_axes[1].plot(batches_4, trainAccBatch_4, 'r', label = 'Training Accuracy')
fig8_axes[1].plot(batches_4, validAccBatch_4, 'g', label = 'Validation Accuracy')
fig8_axes[1].plot(batches_4, np.full_like(batches_3, 0.93, dtype = np.float32), '--', label = '0.93')
fig8_axes[1].set_xticks(np.arange(0, EPOCHS*batchLength + batchLength, step = batchLength))
fig8_axes[1].set_yticks(np.arange(0, 1 + 0.2, step = 0.2))
fig8_axes[1].set_xlim(0, EPOCHS*batchLength)
fig8_axes[1].legend(loc = 'lower right')
fig8_axes[1].grid()
fig8_axes[1].set_title('Accuracy')
figure8.savefig('ImgsReport/08_LeNetV1TrainingResults04')
Increasing the learning rate to 0.002 clearly decreased the performance of the model. It looks like the best learning rate for this model using the Adam optimizer is 0.001. Also it looks like that the model will not get better by changing the hyperparameters. But what about creating a bigger model?.
The model until now is almost the same used by Yann LeCunn to clasify hand written numbers. Now we are working with images with more information where color is also a factor, and also there are more labels, so it seems logic to make a bigger model than Yenn LeCunn's.
Let's define the network arquitecture like following:
As seen, the network has a new convolutional layer and a new fully connected layer. The new convolutional layer should recognize more abstract features on the images like characters or complex symbols. Every convolutional layer has also more feature maps, resulting on more shapes recognized on every layer. The new fully connected layer makes it possible to classify those more complex feature maps of the new convolutional layer.
There is also a negative aspect: the original neural network has 64811 parameters, this new one will have 655761 which is ten times the original one, this is important to know since a very big model can result on memory problems.
The new function is called "LeNet_Improved" and is located in the file "LeNetV1.py"
import sys, importlib
# importlib.reload(sys.modules['LeNetV1'])
from LeNetV1 import LeNet_Improved
from LeNetV1 import trainNetwork
# print(LeNet_Improved(trainData_X, 0.75))
EPOCHS = 10
BATCH_SIZE = 128
LEARNING_RATE = 0.001
DROPOUT_KEEP_PROB = 0.5
nnOut_Imp = LeNet_Improved(X, keepProb)
crossEntropy_Imp = tf.nn.softmax_cross_entropy_with_logits(labels = Y_oneHot, logits = nnOut_Imp)
lossOperation_Imp = tf.reduce_mean(crossEntropy_Imp)
optimizer_Imp = tf.train.AdamOptimizer(learning_rate = LEARNING_RATE)
trainingOperation_Imp = optimizer_Imp.minimize(lossOperation_Imp)
correctPrediction_Imp = tf.equal(tf.argmax(nnOut_Imp, 1), tf.argmax(Y_oneHot, 1))
accuracyOperation_Imp = tf.reduce_mean(tf.cast(correctPrediction_Imp, tf.float32))
initializer = tf.global_variables_initializer()
with tf.Session() as session:
session.run(initializer)
outputLists = trainNetwork(trainData_X, trainData_y, validationData_X, validationData_y,
trainingOperation_Imp, lossOperation_Imp, accuracyOperation_Imp, X, Y, keepProb,
epochs = EPOCHS, batchSize = BATCH_SIZE, dropoutKeepProb = DROPOUT_KEEP_PROB)
batches_Imp = outputLists[0]
lossBatch_Imp = outputLists[1]
trainAccBatch_Imp = outputLists[2]
validAccBatch_Imp = outputLists[3]
batchLength = np.ceil(len(normTrainData_X)/BATCH_SIZE)
figure9, fig9_axes = plt.subplots(2, 1, figsize =(15,15))
figure9title = "Training loss and accuracies with training and validation sets. \n Learning rate: " + \
str(LEARNING_RATE) + ", Batch size: " + str(BATCH_SIZE) + ", Epochs: " + str(EPOCHS) + \
", Dropout keep probability: " + str(DROPOUT_KEEP_PROB)
figure9.suptitle(figure9title, fontsize = 20)
fig9_axes[0].plot(batches_Imp, lossBatch_Imp)
fig9_axes[0].set_xticks(np.arange(0, EPOCHS*batchLength + batchLength, step = batchLength))
fig9_axes[0].grid()
fig9_axes[0].set_xlim(0, EPOCHS*batchLength)
fig9_axes[0].set_title('Training loss')
fig9_axes[1].plot(batches_Imp, trainAccBatch_Imp, 'r', label = 'Training Accuracy')
fig9_axes[1].plot(batches_Imp, validAccBatch_Imp, 'g', label = 'Validation Accuracy')
fig9_axes[1].plot(batches_Imp, np.full_like(batches_Imp, 0.93, dtype = np.float32), '--', label = '0.93')
fig9_axes[1].set_xticks(np.arange(0, EPOCHS*batchLength + batchLength, step = batchLength))
fig9_axes[1].set_yticks(np.arange(0, 1 + 0.2, step = 0.2))
fig9_axes[1].set_xlim(0, EPOCHS*batchLength)
fig9_axes[1].legend(loc = 'lower right')
fig9_axes[1].grid()
fig9_axes[1].set_title('Accuracy')
figure9.savefig('ImgsReport/09_LeNetImprovedTrainingResults01')
It can be seen that this network performs sightly better with a validation accuracy of 94% and sometimes of 95%. It also converges way faster than the first network and the training accuracy reaches 100%. Let's decrease the learning rate to 0.0005 to see what happens.
EPOCHS = 10
BATCH_SIZE = 128
LEARNING_RATE = 0.0005
DROPOUT_KEEP_PROB = 0.5
initializer = tf.global_variables_initializer()
with tf.Session() as session:
session.run(initializer)
outputLists = trainNetwork(trainData_X, trainData_y, validationData_X, validationData_y,
trainingOperation_Imp, lossOperation_Imp, accuracyOperation_Imp, X, Y, keepProb,
epochs = EPOCHS, batchSize = BATCH_SIZE, dropoutKeepProb = DROPOUT_KEEP_PROB)
batches_Imp_2 = outputLists[0]
lossBatch_Imp_2 = outputLists[1]
trainAccBatch_Imp_2 = outputLists[2]
validAccBatch_Imp_2 = outputLists[3]
batchLength = np.ceil(len(normTrainData_X)/BATCH_SIZE)
figure10, fig10_axes = plt.subplots(2, 1, figsize =(15,15))
figure10title = "Training loss and accuracies with training and validation sets. \n Learning rate: " + \
str(LEARNING_RATE) + ", Batch size: " + str(BATCH_SIZE) + ", Epochs: " + str(EPOCHS) + \
", Dropout keep probability: " + str(DROPOUT_KEEP_PROB)
figure10.suptitle(figure10title, fontsize = 20)
fig10_axes[0].plot(batches_Imp_2, lossBatch_Imp_2)
fig10_axes[0].set_xticks(np.arange(0, EPOCHS*batchLength + batchLength, step = batchLength))
fig10_axes[0].grid()
fig10_axes[0].set_xlim(0, EPOCHS*batchLength)
fig10_axes[0].set_title('Training loss')
fig10_axes[1].plot(batches_Imp_2, trainAccBatch_Imp_2, 'r', label = 'Training Accuracy')
fig10_axes[1].plot(batches_Imp_2, validAccBatch_Imp_2, 'g', label = 'Validation Accuracy')
fig10_axes[1].plot(batches_Imp_2, np.full_like(batches_Imp_2, 0.93, dtype = np.float32), '--', label = '0.93')
fig10_axes[1].set_xticks(np.arange(0, EPOCHS*batchLength + batchLength, step = batchLength))
fig10_axes[1].set_yticks(np.arange(0, 1 + 0.2, step = 0.2))
fig10_axes[1].set_xlim(0, EPOCHS*batchLength)
fig10_axes[1].legend(loc = 'lower right')
fig10_axes[1].grid()
fig10_axes[1].set_title('Accuracy')
figure10.savefig('ImgsReport/09_LeNetImprovedTrainingResults01')
Decreasing the learning rate to 0.0005 really improved the performance of the model. Now it achieves a steady validation accuracy of 95% and even reached 96% once. However, it is only 1% more. It seems that the performance will not get any better without changing the data.
Since the validation accuracy of the model(s) is not getting better with the normal data, it was decided to augment it. Three methods where selected to augment the data: Translation, rotation and zooming. For that, three functions where created: translateImage, rotateImage and zoomImage respectivelly. These functions use the OpenCV library and are located on the file "DataAugmentation.py".
Let's see what the functions can do applying it to a test image.
from DataAugmentation import translateImage
from DataAugmentation import rotateImage
from DataAugmentation import zoomImage
import matplotlib.pyplot as plt
%matplotlib inline
augmentationTestImage = trainData_X[15732]
translatedTestImage_1 = translateImage(augmentationTestImage, -10, 10)
translatedTestImage_2 = translateImage(augmentationTestImage, 5, -5)
translatedTestImage_3 = translateImage(augmentationTestImage, 3, 3)
rotatedTestImage_1 = rotateImage(augmentationTestImage, 20)
rotatedTestImage_2 = rotateImage(augmentationTestImage, -20)
rotatedTestImage_3 = rotateImage(augmentationTestImage, 5)
zoomedTestImage_1 = zoomImage(augmentationTestImage, 2)
zoomedTestImage_2 = zoomImage(augmentationTestImage, -2)
zoomedTestImage_3 = zoomImage(augmentationTestImage, 4)
figure11, fig11_axes = plt.subplots(3, 3, figsize =(15,15))
figure11.suptitle('Image augmentation', fontsize = 20)
fig11_axes[0,0].imshow(translatedTestImage_1)
fig11_axes[0,0].set_title('Translated x-10, y+10')
fig11_axes[0,1].imshow(translatedTestImage_2)
fig11_axes[0,1].set_title('Translated x+5, y-5')
fig11_axes[0,2].imshow(translatedTestImage_3)
fig11_axes[0,2].set_title('Translated x+3, y+3')
fig11_axes[1,0].imshow(rotatedTestImage_1)
fig11_axes[1,0].set_title('Rotated 20°')
fig11_axes[1,1].imshow(rotatedTestImage_2)
fig11_axes[1,1].set_title('Rotated -20°')
fig11_axes[1,2].imshow(rotatedTestImage_3)
fig11_axes[1,2].set_title('Rotated 5°')
fig11_axes[2,0].imshow(zoomedTestImage_1)
fig11_axes[2,0].set_title('Zoomed in 2 pixels')
fig11_axes[2,1].imshow(zoomedTestImage_2)
fig11_axes[2,1].set_title('Zoomed out 2 pixels')
fig11_axes[2,2].imshow(zoomedTestImage_3)
fig11_axes[2,2].set_title('Zoomed in 4 pixels')
figure11.savefig('ImgsReport/11_ImageAugmentationTestImage')
It can be seen that the three functions do a good job applying the desired operation on the test image. They also conserve the shape of the original image, which is very important in order to use them for the training of the model.
In order to augment every image of the training dataset, the function "augmentData" was created (located on file "DataAugmentation.py"), which receives as input the original data and labels and gives as output the augmented versions. It has 4 parameters which are choosen carefully thinking about what the neural network would receive as input in the real life. So the translation was restricted to 5 pixels to any direction, the rotation to 15 degrees to the right or to the left and the zooming to a range between -4 and 4.
Let's use it to augment the training set:
from DataAugmentation import augmentData
trainData_X, trainData_y = augmentData(trainData_X, trainData_y,
maxTranslationX = 8, maxTranslationY = 8,
maxRotation = 25, maxZoom = 2)
print(trainData_X.shape)
print(trainData_y.shape)
import matplotlib.pyplot as plt
import random
%matplotlib inline
figure12, fig12_axes = plt.subplots(10, 10, figsize =(20,20))
figure12.tight_layout()
figure12.suptitle('100 randomly chosen images from the augmented training data.\nNumbers indicate their label.',
fontsize = 20)
fig12_axes = fig12_axes.flatten()
for ax in fig12_axes:
imgIndex = random.randint(trainData_quantity, 2*trainData_quantity-1)
ax.imshow(trainData_X[imgIndex], cmap = 'gray')
ax.set_title(trainData_y[imgIndex])
ax.axis('off')
figure12.subplots_adjust(top = 0.9, bottom = 0.03)
figure12.savefig('ImgsReport/12_100RandomAugmentedTrainData')
From visualising the new augmented data, it can be seen how now rotated, zoomed and translated images are present on the dataset.
It can be seen that the training set is now 2 times bigger.
Now it needs to be normalized again. In order to do that, a new normalization function needed to be programmed because of memory issues.
# Function to normalize images from range (0, 255) to another range. The default normalized range is between
# -1 and 1, so the image data have a mean of 0 and a standard deviation of 1.
def normalizeRGBImage2(image, normRange = (0, 1)):
normalizedImage = np.zeros_like(image, dtype = np.float32)
normalizedMinMatrix = np.full_like(image[0,:,:,:], normRange[0], dtype = np.float32)
inputMin = 0
inputMax = 255
inputMinMatrix = np.full_like(image[0,:,:,:], inputMin, dtype = np.float32)
slope = (normRange[1] - normRange[0])/(inputMax - inputMin)
for i in range(image.shape[0]):
normalizedImage[i,:,:,:] = (image[i,:,:,:] - inputMinMatrix)*slope + normalizedMinMatrix
return normalizedImage
trainData_X = normalizeRGBImage2(trainData_X)
validationData_X = normalizeRGBImage2(validationData_X)
testData_X = normalizeRGBImage2(testData_X)
import sys, importlib
# importlib.reload(sys.modules['LeNetV1'])
from LeNetV1 import LeNet_Improved
from LeNetV1 import trainNetwork
print(LeNet_Improved(trainData_X, 0.75))
import tensorflow as tf
X = tf.placeholder(tf.float32, (None, 32, 32, 3))
Y = tf.placeholder(tf.int32, (None))
keepProb = tf.placeholder(tf.float32)
# It is important to convert the label data to one hot before doing training with it, since Y and the output
# of LeNet_V1 need to have the same shape.
Y_oneHot = tf.one_hot(Y, 43)
EPOCHS = 10
BATCH_SIZE = 50
LEARNING_RATE = 0.0005
DROPOUT_KEEP_PROB = 0.5
nnOut_Aug = LeNet_Improved(X, keepProb)
crossEntropy_Aug = tf.nn.softmax_cross_entropy_with_logits(labels = Y_oneHot, logits = nnOut_Aug)
lossOperation_Aug = tf.reduce_mean(crossEntropy_Aug)
optimizer_Aug = tf.train.AdamOptimizer(learning_rate = LEARNING_RATE)
trainingOperation_Aug = optimizer_Aug.minimize(lossOperation_Aug)
correctPrediction_Aug = tf.equal(tf.argmax(nnOut_Aug, 1), tf.argmax(Y_oneHot, 1))
accuracyOperation_Aug = tf.reduce_mean(tf.cast(correctPrediction_Aug, tf.float32))
initializer = tf.global_variables_initializer()
saveFile = 'LeNet_Improved.ckpk'
saver = tf.train.Saver()
with tf.Session() as session:
session.run(initializer)
outputLists = trainNetwork(trainData_X, trainData_y, validationData_X, validationData_y,
trainingOperation_Aug, lossOperation_Aug, accuracyOperation_Aug, X, Y, keepProb,
epochs = EPOCHS, batchSize = BATCH_SIZE, dropoutKeepProb = DROPOUT_KEEP_PROB)
batches_Aug = outputLists[0]
lossBatch_Aug = outputLists[1]
trainAccBatch_Aug = outputLists[2]
validAccBatch_Aug = outputLists[3]
saver.save(session, saveFile)
print("Model saved")
batchLength = np.ceil(len(trainData_X)/BATCH_SIZE)
figure13, fig13_axes = plt.subplots(2, 1, figsize =(15,15))
figure13title = "Training loss and accuracies with training and validation sets. \n Learning rate: " + \
str(LEARNING_RATE) + ", Batch size: " + str(BATCH_SIZE) + ", Epochs: " + str(EPOCHS) + \
", Dropout keep probability: " + str(DROPOUT_KEEP_PROB)
figure13.suptitle(figure13title, fontsize = 20)
fig13_axes[0].plot(batches_Aug, lossBatch_Aug)
fig13_axes[0].set_xticks(np.arange(0, EPOCHS*batchLength + batchLength, step = batchLength))
fig13_axes[0].grid()
fig13_axes[0].set_xlim(0, EPOCHS*batchLength)
fig13_axes[0].set_title('Training loss')
fig13_axes[1].plot(batches_Aug, trainAccBatch_Aug, 'r', label = 'Training Accuracy')
fig13_axes[1].plot(batches_Aug, validAccBatch_Aug, 'g', label = 'Validation Accuracy')
fig13_axes[1].plot(batches_Aug, np.full_like(batches_Aug, 0.93, dtype = np.float32), '--', label = '0.93')
fig13_axes[1].set_xticks(np.arange(0, EPOCHS*batchLength + batchLength, step = batchLength))
fig13_axes[1].set_yticks(np.arange(0, 1 + 0.2, step = 0.2))
fig13_axes[1].set_xlim(0, EPOCHS*batchLength)
fig13_axes[1].legend(loc = 'lower right')
fig13_axes[1].grid()
fig13_axes[1].set_title('Accuracy')
figure13.savefig('ImgsReport/13_LeNetImprovedTrainingResultsAug01')
The model reached 95% accuracy like before without the augmented data. But here it can be seen that the model can still learn a little bit having more epochs. Let's increase the number of epochs to 20 and see what happens.
EPOCHS = 20
BATCH_SIZE = 50
LEARNING_RATE = 0.0005
DROPOUT_KEEP_PROB = 0.5
with tf.Session() as session:
session.run(initializer)
outputLists = trainNetwork(trainData_X, trainData_y, validationData_X, validationData_y,
trainingOperation_Aug, lossOperation_Aug, accuracyOperation_Aug, X, Y, keepProb,
epochs = EPOCHS, batchSize = BATCH_SIZE, dropoutKeepProb = DROPOUT_KEEP_PROB)
batches_Aug = outputLists[0]
lossBatch_Aug = outputLists[1]
trainAccBatch_Aug = outputLists[2]
validAccBatch_Aug = outputLists[3]
saver.save(session, saveFile)
print("Model saved")
batchLength = np.ceil(len(trainData_X)/BATCH_SIZE)
figure14, fig14_axes = plt.subplots(2, 1, figsize =(15,15))
figure14title = "Training loss and accuracies with training and validation sets. \n Learning rate: " + \
str(LEARNING_RATE) + ", Batch size: " + str(BATCH_SIZE) + ", Epochs: " + str(EPOCHS) + \
", Dropout keep probability: " + str(DROPOUT_KEEP_PROB)
figure14.suptitle(figure14title, fontsize = 20)
fig14_axes[0].plot(batches_Aug, lossBatch_Aug)
fig14_axes[0].set_xticks(np.arange(0, EPOCHS*batchLength + batchLength, step = batchLength))
fig14_axes[0].grid()
fig14_axes[0].set_xlim(0, EPOCHS*batchLength)
fig14_axes[0].set_title('Training loss')
fig14_axes[1].plot(batches_Aug, trainAccBatch_Aug, 'r', label = 'Training Accuracy')
fig14_axes[1].plot(batches_Aug, validAccBatch_Aug, 'g', label = 'Validation Accuracy')
fig14_axes[1].plot(batches_Aug, np.full_like(batches_Aug, 0.93, dtype = np.float32), '--', label = '0.93')
fig14_axes[1].set_xticks(np.arange(0, EPOCHS*batchLength + batchLength, step = batchLength))
fig14_axes[1].set_yticks(np.arange(0, 1 + 0.2, step = 0.2))
fig14_axes[1].set_xlim(0, EPOCHS*batchLength)
fig14_axes[1].legend(loc = 'lower right')
fig14_axes[1].grid()
fig14_axes[1].set_title('Accuracy')
figure14.savefig('ImgsReport/14_LeNetImprovedTrainingResultsAug02')
By increasing the epochs to 20 the validation accuracy increased to 96%. Here the training accuracy reached 99%, which means it is not probable that the model performance gets better by increasing the number of epochs.
It seems that the model will not get any better with this dataset, probably because some images are very dark and therefore difficult to identify. If that is the problem, a good idea would be to increase the contrast or the brightness of the images, but that would mean that this calibration would also be necessary on the system where the model would be used, what may not be possible. Therefore this is the definitive version of the model on this project.
Now it is time to calculate the test accuracy, which is the real quality indicator of the model. For that the function "testAccuracy" on the file "LeNetV1.py" was programmmed.
import sys, importlib
importlib.reload(sys.modules['LeNetV1'])
from LeNetV1 import testNetwork
saver = tf.train.Saver()
saveFile = 'LeNet_Improved.ckpk'
with tf.Session() as session:
saver.restore(session, saveFile)
testAccuracy = testNetwork(testData_X, testData_y, accuracyOperation_Aug, X, Y, keepProb, batchSize = BATCH_SIZE)
print('The model reached a test accuracy of: ', testAccuracy)
The test accuracy obtained by the model was of 95%, very close to the validation accuracy of 96%, this is a very good value since the test dataset are images that were never seen by the model and therefore the test accuracy is always smaller than the validation accuracy.
In order to test the model further, 5 images from internet were selected. They are saved in the folder "NewImages"
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import cv2
keepLeftImg = mpimg.imread('NewImages/Image1.jpg')
zone30Img = mpimg.imread('NewImages/Image2.jpg')
stopSnowImg = mpimg.imread('NewImages/Image3.jpg')
flyingDeerImg = mpimg.imread('NewImages/Image4.jpg')
electronic60Img = mpimg.imread('NewImages/Image5.jpg')
These images first need to be resized to 33x33x3 in order to be used as input for the model. For that, the function "resize" of OpenCV is used.
import cv2
def normalizeRGBImage(image, normRange = (-1, 1)):
normalizedImage = np.zeros_like(image)
normalizedMinMatrix = np.full_like(image, normRange[0], dtype = np.float32)
inputMin = 0
inputMax = 255
inputMinMatrix = np.full_like(image, inputMin, dtype = np.float32)
slope = (normRange[1] - normRange[0])/(inputMax - inputMin)
normalizedImage = (image - inputMinMatrix)*slope + normalizedMinMatrix
return normalizedImage
keepLeftImgResized = normalizeRGBImage(cv2.resize(keepLeftImg, (32,32)), normRange = (0,1))
zone30ImgResized = normalizeRGBImage(cv2.resize(zone30Img, (32,32)), normRange = (0,1))
stopSnowImgResized = normalizeRGBImage(cv2.resize(stopSnowImg, (32,32)), normRange = (0,1))
flyingDeerImgResized = normalizeRGBImage(cv2.resize(flyingDeerImg, (32,32)), normRange = (0,1))
electronic60ImgResized = normalizeRGBImage(cv2.resize(electronic60Img, (32,32)), normRange = (0,1))
figure15, fig15_axes = plt.subplots(5, 2, figsize =(10,20))
figure15.suptitle('Selected new images original and resized', fontsize = 20)
fig15_axes[0,0].imshow(keepLeftImg)
fig15_axes[0,1].imshow(keepLeftImgResized)
fig15_axes[1,0].imshow(zone30Img)
fig15_axes[1,1].imshow(zone30ImgResized)
fig15_axes[2,0].imshow(stopSnowImg)
fig15_axes[2,1].imshow(stopSnowImgResized)
fig15_axes[3,0].imshow(flyingDeerImg)
fig15_axes[3,1].imshow(flyingDeerImgResized)
fig15_axes[4,0].imshow(electronic60Img)
fig15_axes[4,1].imshow(electronic60ImgResized)
figure15.savefig('ImgsReport/15_SelectedNewImages')
Now let's get the predictions of the model:
newImages = np.zeros((5,32,32,3), dtype = np.float32)
newImages[0,:,:,:] = keepLeftImgResized
newImages[1,:,:,:] = zone30ImgResized
newImages[2,:,:,:] = stopSnowImgResized
newImages[3,:,:,:] = flyingDeerImgResized
newImages[4,:,:,:] = electronic60ImgResized
with tf.Session() as session:
saver.restore(session, saveFile)
predictions = session.run(tf.nn.softmax(nnOut_Aug), feed_dict = {X: newImages, keepProb: 1.0})
top_5 = session.run(tf.nn.top_k(tf.nn.softmax(nnOut_Aug),5), feed_dict = {X: newImages, keepProb: 1.0})
import pandas as pd
dataFrame=[]
dfColumns = ["Label","Probability"]
t5probabilities = top_5[0]
t5labels = top_5[1]
for i in range(0,5):
df = pd.DataFrame(columns=dfColumns)
for j in range(0,5):
df.loc[j] = [str(t5labels[i][j]),t5probabilities[i][j]]
dataFrame.append(df)
Following is a table of the the highest 5 probabilities for the image 1 "keep left"
print(dataFrame[0])
labelsIdentifier = np.linspace(0, 42, 43)
figure16, fig16_axes = plt.subplots(1, 1, figsize =(15,10))
figure16.suptitle('Prediction of image "keep left"', fontsize = 20)
fig16_axes.bar(labelsIdentifier, predictions[0])
fig16_axes.set_xticks(np.arange(0, 42, step = 2))
figure16.savefig('ImgsReport/16_PredictionImageKeepLeft')
It can be seen that the model is almost completely sure about this prediction, it believes with 99,92% probability that the image has label 39, which means "keep left", this is correct so the model was right here.
The next probability is 0.44% for the label 33 (Turn right ahead) which is similar to the traffic sign "keep left" having also an arrow but pointing to the right.
Other similarly high probability is 0.32% for the label 37 (Go straight or left). This sign has also an arrow pointing to the left like the sign "keep left".
The other two probabilities are very low 0.0029% for the label 40 (Roundabout mandatory) and 0.00042% for the label 35 (Ahead only). These images are also blue and have arrows, but in other directions. It seems that the background color of the sign is a very important feature in order to classify them. But more important in this case are the arrows and their orientation.
The label 38 corresponding to the keep right label was not between the 5 highest probabilities as one could believe by analising the distribuition of the dataset where the keep right sign appears way more often than the keep left one.
The next image is a 30 Km/h speed limit sign with a small additional sign at the bottom.
print(dataFrame[1])
figure17, fig17_axes = plt.subplots(1, 1, figsize =(15,10))
figure17.suptitle('Prediction of image "Speed limit 30km/h"', fontsize = 20)
fig17_axes.bar(labelsIdentifier, predictions[1])
fig17_axes.set_xticks(np.arange(0, 42, step = 2))
figure17.savefig('ImgsReport/17_PredictionImageSpeedLimit30')
For this image the model is very sure that the image is 30 km/h with a probability of 99.99999% of being label 1 (Speed limit 30 km/h). The next probability is of 0,000008% is for the label 0 (20 km/h). The other probabilities are extremely low for the label 2 (Speed limit 50 km/h), label 14 (Stop) and label 3 (Speed limit 60 km/h) in decreasing order. It is interesting to remark that the stop sign is more probable than other speed limit signs, which could indicate that for the model is more important the text in the middle of the image than the background color. It looks that the model recognizes very well the difference between 2, 3 and 5.
This image is a stop sign with snow on it.
print(dataFrame[2])
figure18, fig18_axes = plt.subplots(1, 1, figsize =(15,10))
figure18.suptitle('Prediction of image "Stop sign with snow"', fontsize = 20)
fig18_axes.bar(labelsIdentifier, predictions[2])
fig18_axes.set_xticks(np.arange(0, 42, step = 2))
figure18.savefig('ImgsReport/18_PredictionImageStopWithSnow')
The results for this image are very interesting because the model got it wrong here. It predicted with 99,97% probability that the image is of label 15 which means "no vehicles". This image has a red border and a white center, similar to what the snowed stop sign looks like. The next probability is of 0.02% for the correct label which is 14 (Stop sign) The other probabilities are very low for the labels 13 (Yield), 9 (No passing) and 2 (Speed limit 50 km/h) in decreasing order. All these images have white as background. With these data it can be said again that for the model is the background color a very important feature in order to clasify.
The model did not have the possibility here to have this answer right maybe because it have never seen a snowed stop sign during training.
The next image is a "wild animals crossing" sign with the peculiarity that the deer on it has wings (maybe because some person drawn them).
print(dataFrame[3])
figure19, fig19_axes = plt.subplots(1, 1, figsize =(15,10))
figure19.suptitle('Prediction of image "Wild animals crossing (with wings)"', fontsize = 20)
fig19_axes.bar(labelsIdentifier, predictions[3])
fig19_axes.set_xticks(np.arange(0, 42, step = 2))
figure19.savefig('ImgsReport/19_PredictionImageFlyingDeer')
Here is also the model very certain about its prediction. With 99.953% it predicts that the image is a "wild animals crossing" sign (label 31). The next probability is of 0.0257% for label 21 (Double curve), this sign has a similar drawing on it with almost the same form. The next probability is 0.0211% for the label 23 (Slippery road), this sign has a car drawing and two drawings of curve lines on it which have the same orientation than the double curve on the sign 21. The next probabilities are very low for the labels 29(Bicycles crossing) and 25(Road work) respectivelly, these signs are also red triangles with white background and black symbols on it, but the symbols have completelly other forms.
The last image is a 60 km/h speed limit sign with black background because it is electronic.
print(dataFrame[4])
figure20, fig20_axes = plt.subplots(1, 1, figsize =(15,10))
figure20.suptitle('Prediction of image "Speed limit 60 km/h (electronic)"', fontsize = 20)
fig20_axes.bar(labelsIdentifier, predictions[4])
fig20_axes.set_xticks(np.arange(0, 42, step = 2))
figure20.savefig('ImgsReport/20_PredictionImageSpeedLimit60Elect')
This is the only image where the model had real doubts. It predicted it well but with a probability of "only" 81% (label 3). The next probability is of 12% for label 2 (Speed limit 50 km/h). After that, the next one is 3% for the label 5 (80 km/h). All these signs are speed limit signs. The next probability is of 2% for stop sign, again, it may be recognized as alternative because of the text, specially the 6 and 0 look similar to S and O. The last probability is 0.7% for label 1 which is speed limit 30 km/h.
The reason why for this image the probabilities are more varied is for sure not the background color, it is rather more the thickness of the characters, for this electronic sign the 60 is thinner than for the normal speed signs, that makes it more difficult to identify after the image was resized to 32x32 as it can be seen above.
In order to visualize the outputs of the convolutional layers and with that know more deeply what the model finds important, it is needed to extract the tensors which execute the convolution from the saved model. In order to do that the tensors conv1out, conv2out and conv3out of the function "LeNet_Improved" are needed. In order get them, the operation "tf.get_default_graph().get_tensor_by_name" is used. This method takes as argument the name of a tensor. Since for these tensors no names were defined at the moment the model was trained, they are called as default "BiasAdd", "BiasAdd_1" and "BiasAdd_2" since they are bias add functions.
def outputFeatureMap(image_input, tf_activation, X_ph, activation_min=-1, activation_max=-1 ,plt_num=1):
# Here make sure to preprocess your image_input in a way your network expects
# with size, normalization, ect if needed
# image_input =
# Note: x should be the same name as your network's tensorflow data placeholder variable
# If you get an error tf_activation is not defined it may be having trouble accessing the variable from inside a function
sess = tf.get_default_session()
activation = tf_activation.eval(session=sess,feed_dict={X_ph : image_input})
featuremaps = activation.shape[3]
plt.figure(plt_num, figsize=(15,15))
for featuremap in range(featuremaps):
plt.subplot(6,8, featuremap+1) # sets the number of feature maps to show on each row and column
plt.title('FeatureMap ' + str(featuremap)) # displays the feature map number
if activation_min != -1 & activation_max != -1:
plt.imshow(activation[0,:,:, featuremap], interpolation="nearest", vmin =activation_min, vmax=activation_max, cmap="gray")
elif activation_max != -1:
plt.imshow(activation[0,:,:, featuremap], interpolation="nearest", vmax=activation_max, cmap="gray")
elif activation_min !=-1:
plt.imshow(activation[0,:,:, featuremap], interpolation="nearest", vmin=activation_min, cmap="gray")
else:
plt.imshow(activation[0,:,:, featuremap], interpolation="nearest", cmap="gray")
keepLeftImgFM = np.zeros((1,32,32,3), dtype = np.float32)
keepLeftImgFM[0,:,:,:] = keepLeftImgResized
with tf.Session() as session:
layer1out = tf.get_default_graph().get_tensor_by_name("BiasAdd:0")
layer2out = tf.get_default_graph().get_tensor_by_name("BiasAdd_1:0")
layer3out = tf.get_default_graph().get_tensor_by_name("BiasAdd_2:0")
saver.restore(session, saveFile)
outputFeatureMap(keepLeftImgFM, layer1out, X, plt_num = 1)
outputFeatureMap(keepLeftImgFM, layer2out, X, plt_num = 2)
outputFeatureMap(keepLeftImgFM, layer3out, X, plt_num = 3)
zone30ImgFM = np.zeros((1,32,32,3), dtype = np.float32)
zone30ImgFM[0,:,:,:] = zone30ImgResized
with tf.Session() as session:
saver.restore(session, saveFile)
layer1out = tf.get_default_graph().get_tensor_by_name("BiasAdd:0")
layer2out = tf.get_default_graph().get_tensor_by_name("BiasAdd_1:0")
layer3out = tf.get_default_graph().get_tensor_by_name("BiasAdd_2:0")
outputFeatureMap(zone30ImgFM, layer1out, X, plt_num = 1)
outputFeatureMap(zone30ImgFM, layer2out, X, plt_num = 2)
outputFeatureMap(zone30ImgFM, layer3out, X, plt_num = 3)
stopSnowImgFM = np.zeros((1,32,32,3), dtype = np.float32)
stopSnowImgFM[0,:,:,:] = stopSnowImgResized
with tf.Session() as session:
saver.restore(session, saveFile)
layer1out = tf.get_default_graph().get_tensor_by_name("BiasAdd:0")
layer2out = tf.get_default_graph().get_tensor_by_name("BiasAdd_1:0")
layer3out = tf.get_default_graph().get_tensor_by_name("BiasAdd_2:0")
outputFeatureMap(stopSnowImgFM, layer1out, X, plt_num = 1)
outputFeatureMap(stopSnowImgFM, layer2out, X, plt_num = 2)
outputFeatureMap(stopSnowImgFM, layer3out, X, plt_num = 3)
flyingDeerImgFM = np.zeros((1,32,32,3), dtype = np.float32)
flyingDeerImgFM[0,:,:,:] = flyingDeerImgResized
with tf.Session() as session:
saver.restore(session, saveFile)
layer1out = tf.get_default_graph().get_tensor_by_name("BiasAdd:0")
layer2out = tf.get_default_graph().get_tensor_by_name("BiasAdd_1:0")
layer3out = tf.get_default_graph().get_tensor_by_name("BiasAdd_2:0")
outputFeatureMap(flyingDeerImgFM, layer1out, X, plt_num = 1)
outputFeatureMap(flyingDeerImgFM, layer2out, X, plt_num = 2)
outputFeatureMap(flyingDeerImgFM, layer3out, X, plt_num = 3)
electronic60ImgFM = np.zeros((1,32,32,3), dtype = np.float32)
electronic60ImgFM[0,:,:,:] = electronic60ImgResized
with tf.Session() as session:
saver.restore(session, saveFile)
layer1out = tf.get_default_graph().get_tensor_by_name("BiasAdd:0")
layer2out = tf.get_default_graph().get_tensor_by_name("BiasAdd_1:0")
layer3out = tf.get_default_graph().get_tensor_by_name("BiasAdd_2:0")
outputFeatureMap(electronic60ImgFM, layer1out, X, plt_num = 1)
outputFeatureMap(electronic60ImgFM, layer2out, X, plt_num = 2)
outputFeatureMap(electronic60ImgFM, layer3out, X, plt_num = 3)
By getting the feature maps for every one of the new images, it can be seen that the network does it very well identifying the important paterns on the layer 1. The layers 2 and 3 are no human readable anymore, since the resolution of the images is very low and the network is looking for very specific features.